package weibo

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object KeyWordTop {
  def main(args: Array[String]): Unit = {
    val sc: SparkContext = new SparkContext("local[*]", "KeyWordTop")
    val inputPath: String = "file/WeiBoData/input"
    val outputPath: String = "file/WeiBoData/output/everyMonthKeyWord"

    val dataRdd: RDD[String] = sc.textFile(inputPath)
      .filter(x => x.contains("#"))
      .repartition(4)

    val lineRegex = """#.*?#""".r

    dataRdd.map(x => {
      (
        lineRegex.findFirstIn(x).getOrElse(x), 1)
    })
      .reduceByKey(_ + _)
      .sortBy(_._2, ascending = false)
      .take(10)
      .foreach(println)

    sc.stop()
  }
}
